//replicate KKPS estimation



/*	globals for adjustments [if switch: 1 = on, 0 = out] ---------------- *//* out can also be other numbers of course */
global	nuclear = 1								/* SWITCH: 1 if only nuclear families should be considered */
global	sstax = 1								/* SWITCH: 1 if social security taxes should be considered when calculating effective tax */
global	childage25 = 1							/* SWITCH: 1 if children below 26 should be considered as children, otherwise below 18 */
global	smissing = 0							/* SWITCH: 1 if observations that are surrounded by missings should not be included in income regressions */
global	nbins = 10								/* number of bins */
global	equalbins = 1							/* SWITCH: 1 if equal sized bins, otherwise unequal sized bins --> !!! bincat must be specified accordingly */
global	bincat = "5,10,15,20,25,30,35,40,45,50,55,60,65,70,75,80,85,90,95"		/* !!!! must correspondent to nbins - YOU WILL NOT GET AN ERROR MESSAGE IF nbin IS TOO SMALL!!! TAKE CARE!!! (i.e. 2 percentiles need 3 bins, 3 percentiles need 4 bins, ...*/
/*	--------------------------------------------------------------------- *//* example: bincat = "(10,20)" -> 1. bin: 0th to 10th percentile, 2. bin: 10th to 20th percentile, 3. bin: 20th to 100th percentile ---> hence, n percentiles need (n+1) bins */



/* 	globals for data access --------------------------------------------- */
global	waves "l m n o p q r s t u v w x y z ba bb bc bd be"  
global	years "95 96 97 98 99 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14"  

local	j = 1
foreach i of global years{
	global	y`j' "`i'"
	local	j = `j' +1
	}
/* 	--------------------------------------------------------------------- */


/*	obtain data from pequiv --------------------------------------------- */ /* https://www.diw.de/documents/publikationen/73/diw_01.c.407395.de/diw_datadoc_2012-065.pdf */
local	j = 1
foreach	i of global waves {
	foreach k of global y`j'{
use	`i'hhnr d11102ll d11101`k' d11104`k' d11105`k' ///
			d11106`k' d11107`k' i11101`k' i11103`k' i11107`k' ///
			i11109`k' i11110`k' i11112`k' w11102`k' y11101`k' ///
			renty`k' using "$datadir/`i'pequiv.dta", clear
		rename	`i'hhnr   hhnr
		rename	d11102ll  gender
		rename	d11101`k' age
		rename	d11104`k' marstat
		rename	d11105`k' head
		rename	d11106`k' size
		rename	d11107`k' nchild18
		rename 	i11101`k' inc
		rename	i11103`k' linc
		rename	i11107`k' ptrans
		rename	i11109`k' ttax
		rename	i11110`k' ilinc		
		rename 	i11112`k' sstax
		rename	w11102`k' hweight
		rename	y11101`k' cpi
		rename	renty`k'  rentinc
		
		if	`j' > 5 {
			gen	year = `k' + 2000
			}
		else	{
			gen	year = `k' + 1900
			}
		save	"$savedir/master`k'.dta",replace
		local	j = `j' + 1
		}
	}
	
foreach i of global years {
	if	`i' == 95 {
		use	"$savedir/master`i'.dta", clear
		}
	else	{
		append	using "$savedir/master`i'.dta" 
		}
	}









// use	hid syear d11102ll d11101 d11104 d11105 ///
// 			d11106 d11107 i11101 i11103 i11107 ///
// 			i11109 i11110 i11112 w11102 y11101 ///
// 			renty using "$datadir\pequiv.dta", clear
//
// keep if inrange(syear,1995,2014)
// ren syear year
//
// rename  hid      	hhnr
// rename	d11102ll 	gender
// rename	d11101 		age
// rename	d11104 		marstat
// rename	d11105 		head
// rename	d11106 		size
// rename	d11107 		nchild18
// rename 	i11101 		inc
// rename	i11103 		linc
// rename	i11107 		ptrans
// rename	i11109 		ttax
// rename	i11110 		ilinc		
// rename 	i11112 		sstax
// rename	w11102 		hweight
// rename	y11101 		cpi
// rename	renty  		rentinc




/*	generate var I ------------------------------------------------------ */
*	number of children
gen 	child25 = (head == 3 & age <= 25 )					/* numb of cildren below 25 must be counted. Below 18 already generated by SOEP. */
gen     child14 = (head == 3 & age < 14 )                   /* numb of cildren below 25 must be counted. Below 18 already generated by SOEP. */

by	hhnr year, sort: 	egen 	nchild25 = sum(child25)			/* only (biological) children of head included!!! */ 
by	hhnr year, sort: 	egen 	nchild14 = sum(child14)			/* only (biological) children of head included!!! */
gen	nchild = nchild18							/* nchild25 or nchild18 */
if	"$childage25" == "1" {
	replace	nchild = nchild25
	}
*	number of earners
by	hhnr year, sort:	egen	earner = sum(ilinc>0)
*	married
gen	married = (marstat == 1)
/*	--------------------------------------------------------------------- */


/*	clear data set I ---------------------------------------------------- */
drop if head != 1								/* focus only on head of household */
drop if age < 25								/* drop if too young */
*drop if age > 64								/* drop if too old */
drop if nchild == -1								/* drop if no information about children available */	
if	"$nuclear" == "1" {
	drop if marstat == -1							/* drop if no information about marital status available */
	}
/*	--------------------------------------------------------------------- */


/*	generate var II  ---------------------------------------------------- */
*	no nuclear family dummy and size
gen	size_ad = size - nchild							/* Number of HH member who are not children (as defined above) */
gen	nonearner = size - earner						/* Number of HH member who do not earn */
if	"$nuclear" == "1" {
	gen	no_nuc = (size_ad >= 3 | (marstat == 1 & size_ad == 1) | ///	/* indicates if hh is no nuclear fam */	
		marstat != 1 & size_ad != 1)		
	}

*	taxes
gen	efftax = ttax - ptrans
if	"$sstax" == "1" {
	replace efftax = efftax - sstax
	}
gen	netlinc  = linc - efftax
gen	netinc = inc - efftax

*	age group
gen	age1 = (age >= 25 & age <= 34)
gen	age2 = (age >= 35 & age <= 44)
gen	age3 = (age >= 45 & age <= 54)
gen	age4 = (age >= 55 & age <= 64)
gen	age5 = (age >= 65)

*	income, agesquare
gen	landlord = (rentinc > 0) if rentinc != .				/* landlord */
replace	linc = linc * (100 / cpi)						/* real labor income 2006 == 100 */
replace	inc = inc * (100 / cpi)							/* real gross hh income 2006 == 100 */
gen	llinc = log(linc)							/* log lab income */
gen	loginc = log(inc)							/* log gross hh income */
replace	netlinc = netlinc * (100 / cpi)						/* real net labor income 2006 == 100 */
replace netinc = netinc * (100 / cpi)						/* real net gross hh income 2006 == 100 */
gen	lnetlinc = log(netlinc)							/* log net lab income */
gen	lognetinc = log(netinc)							/* log net hh income */
gen	sqage = age^2
/*	--------------------------------------------------------------------- */


/*	clear data set II --------------------------------------------------- */
drop if linc <= 0								/* only positive labor income */
drop if netlinc <= 0								/* only positive  net labor income */
if	"$nuclear" == "1" {
	drop if	no_nuc == 1							/* drop the non-nuclear families */
	}
/*	--------------------------------------------------------------------- */


/*	generate var III ---------------------------------------------------- */
sort	hhnr year								
forvalues i = 1(1)4 {
	by	hhnr:	egen max`i' = max(year) if age`i' == 1
	by	hhnr:	egen min`i' = min(year) if age`i' == 1
	}
egen	max = rowtotal(max*)
egen	min = rowtotal(min*)
gen 	notrans = ((year - year[_n-1] != 1) & (year - year[_n+1] != -1) & ///	/* mark obs if surroundend by missings within hh AND age group
	year != min & year != max) & (hhnr - hhnr[_n-1] == 0) & /// 		/* special for start and end */
	(hhnr - hhnr[_n+1] == 0)
replace notrans = 1 if year == min & (year - year[_n+1] != -1) & ///		/* mark obs if followed by missing and start of age group by hh */
	(hhnr - hhnr[_n+1] == 0)
replace notrans = 1 if year == max & (year - year[_n-1] != 1) & ///		/* mark obs if proceeded by missing and end of age group by hh */
	(hhnr - hhnr[_n-1] == 0)		
drop	max* min*	
/*	--------------------------------------------------------------------- */					



/*	tax and income regression, residuals, bins, hh size ----------------- */
qui tab	year, gen(ydum)								/* year dummies */

gen	z = 1									/* bin category - adjusted in next loop! */

local	nbins_0 = $nbins
local	nbins_1 = $nbins - 1

mat	s = J(5,1,.)								/* average hh size (to be filled) */
mat	sad = J(5,1,.)								/* average hh size adults (to be filled) */
mat	sch = J(5,1,.)								/* average hh size children (to be filled) */
mat	searn = J(5,1,.)							/* average hh size earners (to be filled) */
mat	snearn = J(5,1,.)							/* average hh size non earners (to be filled) */
mat 	sch14 = J(5,1,.)                          /* average hh size children below 14 (to be filled) */

save "$datadir\kkps_dat.dta", replace



